{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "53af08cf-0149-4369-93d6-fba2203ec6cc",
   "metadata": {
    "papermill": {
     "duration": 0.001958,
     "end_time": "2022-10-26T08:38:11.044750",
     "exception": false,
     "start_time": "2022-10-26T08:38:11.042792",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# output-upload-to-cos"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "360d1180",
   "metadata": {},
   "source": [
    "Uploads a file to any S3 compliant Cloud Object Storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05a6d935-38c5-49df-9df2-fbc8c31c4634",
   "metadata": {
    "papermill": {
     "duration": 2.110786,
     "end_time": "2022-10-26T08:38:13.159053",
     "exception": false,
     "start_time": "2022-10-26T08:38:11.048267",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "#!pip install aiobotocore botocore s3fs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "85859764-8fd2-4394-88d6-b3c54194b868",
   "metadata": {
    "papermill": {
     "duration": 0.790577,
     "end_time": "2022-10-26T08:38:13.951695",
     "exception": false,
     "start_time": "2022-10-26T08:38:13.161118",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import logging\n",
    "import os\n",
    "import sys\n",
    "import re\n",
    "import s3fs\n",
    "import glob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8069e66-fad7-4be9-bc6f-b9eaa119b3bc",
   "metadata": {
    "papermill": {
     "duration": 0.01422,
     "end_time": "2022-10-26T08:38:13.968669",
     "exception": false,
     "start_time": "2022-10-26T08:38:13.954449",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# target in format: cos://access_key_id:secret_access_key@endpoint/bucket/path\n",
    "target = os.environ.get('target')\n",
    "\n",
    "# source folder and file pattern (glob)\n",
    "source_file_pattern = os.environ.get('source_file_pattern')\n",
    "\n",
    "# find_recursive, if True, will search for files in subfolders specified in source_file_pattern. Default is True\n",
    "find_recursive = bool(os.environ.get('find_recursive','True'))\n",
    "\n",
    "# process source file path on target using regex. Default is None\n",
    "process_target_file_pattern = os.environ.get('process_target_file_pattern',None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41952b78",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO externalie into library\n",
    "if target.startswith('cos') or target.startswith('s3'):\n",
    "    buffer=target.split('://')[1]\n",
    "    access_key_id=buffer.split('@')[0].split(':')[0]\n",
    "    secret_access_key=buffer.split('@')[0].split(':')[1]\n",
    "    endpoint=buffer.split('@')[1].split('/')[0]\n",
    "    target='/'.join(buffer.split('@')[1].split('/')[1:])\n",
    "\n",
    "\n",
    "    logging.debug(f'access_key_id={access_key_id}')\n",
    "    logging.debug(f'secret_access_key={secret_access_key}')\n",
    "    logging.debug(f'endpoint={endpoint}')\n",
    "    logging.debug(f'target={target}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "704117f6-702d-43ee-9092-201c46b31e15",
   "metadata": {
    "papermill": {
     "duration": 0.014524,
     "end_time": "2022-10-26T08:38:14.006336",
     "exception": false,
     "start_time": "2022-10-26T08:38:13.991812",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "s3 = s3fs.S3FileSystem(\n",
    "    anon=False,\n",
    "    key=access_key_id,\n",
    "    secret=secret_access_key,\n",
    "    client_kwargs={'endpoint_url': f'https://{endpoint}'}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60436d8a-f461-4723-abeb-cc22b555c782",
   "metadata": {
    "papermill": {
     "duration": 1.715267,
     "end_time": "2022-10-26T08:38:15.723556",
     "exception": false,
     "start_time": "2022-10-26T08:38:14.008289",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "for source_file in glob.glob(source_file_pattern, recursive=find_recursive):\n",
    "    target_path = source_file \n",
    "    if process_target_file_pattern is not None:\n",
    "        target_path = re.sub(re.compile(process_target_file_pattern), \"\", source_file)\n",
    "        logging.debug(f'Adjusting target path {source_file} to: {target_path}')\n",
    "    final_destination = f'{target}{target_path}'.replace('//','/')\n",
    "    logging.debug(f'Uploading {source_file} to {final_destination}')\n",
    "    s3.put(source_file, final_destination)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 6.522865,
   "end_time": "2022-10-26T08:38:16.157357",
   "environment_variables": {},
   "exception": null,
   "input_path": "/home/romeokienzler/gitco/claimed/component-library/output/upload-to-cos.ipynb",
   "output_path": "/home/romeokienzler/gitco/claimed/component-library/output/upload-to-cos.ipynb",
   "parameters": {},
   "start_time": "2022-10-26T08:38:09.634492",
   "version": "2.4.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
